This is a report of the analysis of gapminder_clean.csv data.
mydata_1962 <- mydata %>%
filter(Year == 1962)##
## Pearson's product-moment correlation
##
## data: mydata_1962$`CO2.emissions.(metric.tons.per.capita)` and mydata_1962$gdpPercap
## t = 25.269, df = 106, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.8934697 0.9489792
## sample estimates:
## cor
## 0.9260817
The correlation of ‘CO2 emissions (metric tons per capita)’ and gdpPercap equals 0.9260817.
The associated p-value equals < 2.2e-16.
all_years <- unique(mydata$Year)
year_cor_co2_gdp <- setNames(data.frame(matrix(ncol = 2, nrow = 0)),
c("Year", "Correlation"))
for (year in all_years) {
subset <- mydata %>%
filter(Year == year)
correlation <- cor(subset$`CO2.emissions.(metric.tons.per.capita)`,
subset$gdpPercap, use = "complete.obs")
year_cor_co2_gdp[nrow(year_cor_co2_gdp) + 1,] <- c(year, correlation)
}
year_cor_co2_gdp[order(year_cor_co2_gdp$Correlation, decreasing = TRUE),] %>%
kbl() %>%
kable_material(c("striped", "hover"))| Year | Correlation | |
|---|---|---|
| 2 | 1967 | 0.9387918 |
| 1 | 1962 | 0.9260817 |
| 3 | 1972 | 0.8428986 |
| 5 | 1982 | 0.8166384 |
| 6 | 1987 | 0.8095531 |
| 7 | 1992 | 0.8094316 |
| 8 | 1997 | 0.8081396 |
| 9 | 2002 | 0.8006421 |
| 4 | 1977 | 0.7928336 |
| 10 | 2007 | 0.7204169 |
The correlation between ‘CO2 emissions (metric tons per capita)’ and gdpPercap is the strongest in the year 1967.
one_way_anova_1967 <- aov(`Energy.use.(kg.of.oil.equivalent.per.capita)` ~ continent, data = mydata_1967)
summary(one_way_anova_1967)## Df Sum Sq Mean Sq F value Pr(>F)
## continent 3 30161255 10053752 9.642 0.000334 ***
## Residuals 21 21895723 1042653
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 234 observations deleted due to missingness
The p-value is low (p < 0.001), it appears that depending on the continent, there is a difference in energy use.
mydata_1990 <- mydata %>%
filter(Year > 1990) %>%
filter(continent == "Europe" | continent == "Asia")
one_way_anova_1990 <- aov(`Imports.of.goods.and.services.(%.of.GDP)` ~ continent, data = mydata_1967)
summary(one_way_anova_1990)## Df Sum Sq Mean Sq F value Pr(>F)
## continent 4 536 133.9 0.413 0.799
## Residuals 83 26923 324.4
## 171 observations deleted due to missingness
Because of high (>0.05) p-value it is concluded that there is no significant difference between Europe and Asia with respect to ‘Imports of goods and services (% of GDP)’ in the years after 1990.
pop_dens_avg <- setNames(data.frame(matrix(ncol = 2, nrow = 0)),
c("Country", "Average.population.density"))
all_countries <- unique(mydata$Country.Name)
for (country in all_countries) {
subset <- mydata %>%
filter(Country.Name == country)
average <- mean(as.numeric(subset$`Population.density.(people.per.sq..km.of.land.area)`), na.rm=TRUE)
pop_dens_avg[nrow(pop_dens_avg) + 1,] <- c(country, average)
}
pop_dens_avg$Average.population.density <- as.numeric(as.character(pop_dens_avg$Average.population.density))
pop_dens_avg <- pop_dens_avg[order(pop_dens_avg$Average.population.density, decreasing = TRUE),]
head(pop_dens_avg) %>%
kbl() %>%
kable_material(c("striped", "hover"))| Country | Average.population.density | |
|---|---|---|
| 145 | Macao SAR, China | 14732.035 |
| 163 | Monaco | 14089.900 |
| 101 | Hong Kong SAR, China | 5153.057 |
| 209 | Singapore | 4361.500 |
| 88 | Gibraltar | 2622.250 |
| 23 | Bermuda | 1132.780 |
Macao region in China has the highest ‘Population density (people per sq. km of land area)’ across all years. It is equal to 14732.035.
## First year of measurment was 1962 and last one was 2007.
mydata_2007 <- mydata %>%
filter(Year == 2007)
exp_increase <- setNames(data.frame(matrix(ncol = 3, nrow = 0)),
c("Country", "Life.exp.increase.numerical",
"Life.exp.increase.percentage"))
for (country in all_countries) {
subset_1962 <- mydata_1962 %>%
filter(Country.Name == country)
subset_2007 <- mydata_2007 %>%
filter(Country.Name == country)
increase_num <- subset_2007$`Life.expectancy.at.birth,.total.(years)` - subset_1962$`Life.expectancy.at.birth,.total.(years)`
increase_perc <- round(subset_2007$`Life.expectancy.at.birth,.total.(years)` / subset_1962$`Life.expectancy.at.birth,.total.(years)`*100, digits=1)
if (length(increase_num)==0) {
increase_num <- NA
increase_perc <- NA
}
exp_increase[nrow(exp_increase) + 1,] <- c(country, increase_num, increase_perc)
}
exp_increase$Life.exp.increase.numerical <- as.numeric(as.character(exp_increase$Life.exp.increase.numerical))
exp_increase$Life.exp.increase.percentage <- as.numeric(as.character(exp_increase$Life.exp.increase.percentage))
head(exp_increase[order(exp_increase$Life.exp.increase.numerical, decreasing = TRUE),]) %>%
kbl() %>%
kable_material(c("striped", "hover"))| Country | Life.exp.increase.numerical | Life.exp.increase.percentage | |
|---|---|---|---|
| 150 | Maldives | 36.91615 | 195.9 |
| 24 | Bhutan | 33.19895 | 200.3 |
| 238 | Timor-Leste | 31.08515 | 189.5 |
| 242 | Tunisia | 30.86076 | 171.2 |
| 182 | Oman | 30.82310 | 169.6 |
| 171 | Nepal | 30.59963 | 185.1 |
head(exp_increase[order(exp_increase$Life.exp.increase.percentage, decreasing = TRUE),]) %>%
kbl() %>%
kable_material(c("striped", "hover"))| Country | Life.exp.increase.numerical | Life.exp.increase.percentage | |
|---|---|---|---|
| 24 | Bhutan | 33.19895 | 200.3 |
| 150 | Maldives | 36.91615 | 195.9 |
| 151 | Mali | 25.71346 | 190.1 |
| 238 | Timor-Leste | 31.08515 | 189.5 |
| 171 | Nepal | 30.59963 | 185.1 |
| 84 | Gambia, The | 25.90834 | 179.3 |
In the Maldives life expectancy has grown by 37 years, what is a growth of 196%. Bhutan life expectancy has grown by 33 years, what is over 200%.